
package com.jstarcraft.ai.jsat.clustering;

import java.util.ArrayList;
import java.util.List;

import com.jstarcraft.ai.jsat.DataSet;
import com.jstarcraft.ai.jsat.classifiers.DataPoint;

/**
 * A base foundation that provides an implementation of
 * {@link #cluster(com.jstarcraft.ai.jsat.DataSet) } and
 * {@link #cluster(com.jstarcraft.ai.jsat.DataSet, java.util.concurrent.ExecutorService) }
 * using their int array counterparts. <br>
 * <br>
 * By default it is assumed that a cluster does not support weighted data. If
 * this is incorrect, you need to overwrite the {@link #supportsWeightedData() }
 * method.
 *
 * @author Edward Raff
 */
public abstract class ClustererBase implements Clusterer {

    private static final long serialVersionUID = 4359554809306681680L;

    /**
     * Convenient helper method. A list of lists to represent a cluster may be
     * desirable. In such a case, this method will take in an array of cluster
     * assignments, and return a list of lists.
     * 
     * @param assignments the array containing cluster assignments
     * @param dataSet     the original data set, with data in the same order as was
     *                    used to create the assignments array
     * @return a List of lists where each list contains the data points for one
     *         cluster, and the lists are in order by cluster id.
     */
    public static List<List<DataPoint>> createClusterListFromAssignmentArray(int[] assignments, DataSet dataSet) {
        List<List<DataPoint>> clusterings = new ArrayList<>();

        for (int i = 0; i < dataSet.size(); i++) {
            while (clusterings.size() <= assignments[i])
                clusterings.add(new ArrayList<>());
            if (assignments[i] >= 0)
                clusterings.get(assignments[i]).add(dataSet.getDataPoint(i));
        }

        return clusterings;
    }

    /**
     * Gets a list of the datapoints in a data set that belong to the indicated
     * cluster
     * 
     * @param c           the cluster ID to get the datapoints for
     * @param assignments the array containing cluster assignments
     * @param dataSet     the data set to get the points from
     * @param indexFrom   stores the index from the original dataset that the
     *                    datapoint is from, such that the item at index {@code i}
     *                    in the returned list can be found in the original dataset
     *                    at index {@code indexFrom[i]}. May be {@code null}
     * @return a list of datapoints that were assignment to the designated cluster
     */
    public static List<DataPoint> getDatapointsFromCluster(int c, int[] assignments, DataSet dataSet, int[] indexFrom) {
        List<DataPoint> list = new ArrayList<>();
        int pos = 0;
        for (int i = 0; i < dataSet.size(); i++)
            if (assignments[i] == c) {
                list.add(dataSet.getDataPoint(i));
                if (indexFrom != null)
                    indexFrom[pos++] = i;
            }
        return list;
    }

    @Override
    abstract public Clusterer clone();

}
